
***********************************************************
* merge census data for housing starts to CoreLogic, construct CoreLogic variable for new housing starts, and plot both over time.
***********************************************************

local file_path_in "/tochange/"

use "`file_path_in'//house county/rawHOUSEBUILDERall.dta" , clear
drop nsold
gen nsold=1

** plot of number of new homes sold over time
collapse (mean) units* (sum) nsold, by(FIPS_CODE year date)

keep if unitstl!=.

gen gap = abs(units1-nsold)
bysort FIPS_CODE: egen avggap=mean(gap)   

gen gappc = nsold/unitstl
bysort FIPS_CODE: egen avggappc=mean(gappc)   

summ avggappc, d
local dnthres = `r(p50)'
local upthres = `r(p95)'

gen dnthres = `dnthres'
gen upthres = `upthres'

bysort FIPS_CODE: gen highmatch = 1 if avggappc>dnthres
bysort FIPS_CODE: replace highmatch = 0 if highmatch==.

save "`file_path_in'//nsoldbycounty.dta", replace 

use "`file_path_in'//nsoldbycounty.dta", replace 

summ avggappc, d
local dnthres = `r(p50)'
local upthres = `r(p95)'

** 1-unit, 2-unit, 3-unit homes
preserve
	collapse (sum) nsold (sum) units1 units2 units3, by(date year )
	gen units = units1+units2+units3
	corr(nsold units)
	label var nsold "Corelogic"
	label var units "Census"
	replace nsold = nsold / 1000
	replace units = units / 1000
	twoway line nsold units date, ytitle("New Homes (Thous)") xtitle("Year-Month") lpattern(solid dash) graphregion(color(white))
	graph export "`file_path_in'//reg output//nsold_byFips_13u.eps", replace
	graph export "`file_path_in'//reg output//nsold_byFips_13u.png", replace
restore
